*----------------------------------------------------------------------------------------------------------	* 
* Silencing the Rails: A Study of the Noise-Safety Trade-off in Railway Quiet Zones                         *
* RESEARCHERS:		Emtiaz Hritan																		    *
* PROGRAMMED BY:	Emtiaz Hritan 					 											            *
* CREATED:			Oct. 25, 2023																		   	*
* LAST MODIFIED:	Oct. 7, 2025														       				*
*----------------------------------------------------------------------------------------------------------	*

clear all
set more off
* Set local paths
* Set this local datapath equal to the folder location for data
	local datapath "C:\Users\name\Replication Files Silencing the Rails\Data"
* Set this local outputpath equal to the folder location for outcome like tables
	local outputpath "C:\Users\name\Replication Files Silencing the Rails\Outcome"

* Install necessary packages
ssc install reghdfe
ssc install ftools
ssc install grstyle, replace
ssc install palettes, replace
ssc install colrspace, replace
ssc install ppmlhdfe, replace 
ssc install jwdid, replace 
ssc install csdid, replace 
ssc install drdid, replace 
ssc install did_imputation, replace
ssc install frause, replace
frause mpdta.dta, clear
ssc install hdfe, replace 
ssc install event_plot, replace
ssc install addplot, replace


* Clear everything
clear all
set maxvar 120000

****************************************************************************
* Table 3—: Robustness checks: Alternative Staggered DID Models
****************************************************************************
cd "`datapath'"

******* TWFE(PPML): Column (1) ***********
use final 
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

eststo:ppmlhdfe accidents_per_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

******* TWFE(PPML): Column (2) ***********

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe accidents_per_year dty, a(crossingid year statecode#year) vce(cluster crossingid)


******* CSDID: Column (3) ***********

* Clear everything
clear all
set maxvar 120000

clear all
use final 
// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995


* Using csdid package 

csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat)
estat all

******* CSDID: Column (4) ***********

*Not yet treated 
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all



******* BJS: Column (5) ***********

* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
 gen Ei= first_treat
 replace Ei=. if Ei==0
did_imputation accidents_per_year crossingid year Ei, fe(year)


******* BJS: Column (6) ***********
did_imputation accidents_per_year crossingid year Ei, fe(crossingid year)
estimates store bjs


******* BJS: Column (7) ***********

merge m:1 crossingid using state_county.dta, force 
drop _merge
did_imputation accidents_per_year crossingid year Ei, fe(crossingid year statecode#year)


******* ETWM: Column (8) ***********


****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson)
estat simple
estimates store jwdid_notyet


******* ETWM: Column (9) ***********
*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
estat simple
estimates store jwdid_never

eststo clear


*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------


****************************************************************************
*  Figure A10. and Figure A11: Event study
****************************************************************************
* Clear everything
clear all
set maxvar 120000

* Use the accident, all railway crossing and all quietzone dataset "final"
use final 

// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

************* Panel (a )Model proposed by Callaway and Sant'Anna(2021):Never Treated *********************
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat)
estat event, window(-14 14) estore(cs1)
estimates save ecs1, replace
csdid_plot, xtitle("Years since the quietzone") ytitle("Average effect on accidents")
graph export "csdid_never_14.pdf", replace


************* Panel (b) Model proposed by Callaway and Sant'Anna(2021): Not yet treated *********************
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat event, window(-14 14) estore(cs2)
estimates save ecs2, replace
csdid_plot, xtitle("Years since the quietzone") ytitle("Average effect on accidents")
graph export "csdid_notyet_14.pdf", replace

************* Figure A11.:Event study *********************
* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.

gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation accidents_per_year crossingid year Ei, horizons(0/14) pretrend(14) minn(0)
estimates store bjs1 
estimates save ebjs, replace
event_plot, default_look graph_opt(xtitle("Years since the quietzone") ytitle("Average effect on accidents") xlabel(-14(7)14 14) legend(position(bottom)))
graph export "bjs_14.pdf", replace
eststo clear


*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------

****************************************************************************
*  Table 4—: Robustness checks: County Level Analysis
****************************************************************************

clear all
use county_accidents_controls
* Count the number of unique crossings for each county-year combination
bysort county_fips year (crossingid): gen num_crossings = _N if _n == 1

* Carry forward the count within each county-year combination
bysort county_fips year: replace num_crossings = num_crossings[1]


* Step 1: Mark each crossing as a unique quiet zone crossing for each year
bysort county_fips year crossingid: gen unique_quietzone = dty == 1 & _n == 1

* Step 2: Count unique quiet zones per county-year combination
bysort county_fips year: egen num_quiet_zones = total(unique_quietzone)

* Generate nonwhite population
gen non_white= tot_pop - white_pop


* Collapse the data to get unique observations for each county and year, summing up by each category
collapse (sum) tot_pop tot_female non_white accidents_per_year num_crossings num_quiet_zones, by(county_fips year)

*Generate the ratio of quiet zones to total crossings
gen quiet_zone_ratio = num_quiet_zones / num_crossings

*** Table 4—: Robustness checks: Accidents (Count) Column (1) ***


eststo:ppmlhdfe accidents_per_year quiet_zone_ratio, a(county year) vce(cluster county)
estimates store dtwfe

*** Table 4—: Robustness checks: Accidents (Count) Column (2) ***


local controls "tot_pop tot_female non_white" 
eststo:ppmlhdfe accidents_per_year quiet_zone_ratio `controls', a(county year) vce(cluster county)
estimates store dtwfe2

*** Table 4—: Robustness checks: Accidents (per 100k Pop) Column (3) ***


*Generate the number of accidents per 100,000 population
gen accidents_per_100k = (accidents_per_year / tot_pop) * 100000
gen ln_accidents_per_100k = log(accidents_per_100k + 1)
eststo:reghdfe ln_accidents_per_100k quiet_zone_ratio, a(county year) vce(cluster county)
estimates store dtwfe3
eststo clear

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------